1861 Hagelloch Measles

Data references:

  • Pfeilsticker, A. 1863. Beiträge zur Pathologie der Masern mit besonderer Berücksichtigung der statistischen Verhältnisse, M.D. Thesis, Eberhard-Karls-Universität Tübingen. Available as http://www.archive.org/details/beitrgezurpatho00pfeigoog.
  • Oesterle, H. 1992. Statistische Reanalyse einer Masernepidemie 1861 in Hagelloch, M.D. Thesis, Eberhard-Karls-Universitäat Tübingen.
  • Höhle M. 2007. surveillance: An R package for the monitoring of infectious diseases. Computational Statistics, 22:571-582.

In [1]:
using CSV, DelimitedFiles, Distances, Random, Pathogen, Plots, Plots.PlotMeasures, DataFrames;
Random.seed!(4321);

In [2]:
# POPULATION INFOFORMATION
# Use CSV.jl for DataFrames I/O
risks = CSV.read("data/measles_hagelloch_1861_risk_factors.csv")


Out[2]:

188 rows × 6 columns

agegenderfamily_IDclassxy
Int64StringInt64Int64Float64Float64
17f411142.5100.0
26f411142.5100.0
34f410142.5100.0
413m612165.0102.5
58f421145.0120.0
612m422145.0120.0
76m260272.5147.5
810m44197.5155.0
913m44297.5155.0
107f291240.075.0
1111f272270.0135.0
127f321195.027.5
1313m322195.027.5
1413f222227.5185.0
158m221227.5185.0
1615f432172.5172.5
1710f432172.5172.5
182f430172.5172.5
1911m112167.55.0
2010m112167.55.0
2113f112167.55.0
2210f351167.55.0
237f351167.55.0
244m350167.55.0
2512f352167.55.0
267m6717.537.5
275m290240.075.0
2810f65215.047.5
2913m152125.0187.5
3011f152125.0187.5
&vellip&vellip&vellip&vellip&vellip&vellip&vellip

In [3]:
# Will precalculate distances
distance = [euclidean([risks[i, :x]; risks[i, :y]], [risks[j, :x]; risks[j, :y]]) for i = 1:size(risks, 1), j = 1:size(risks, 1)]
temp1 = [prod(risks[[i, j], :class]) for i = 1:size(risks, 1), j = 1:size(risks, 1)]
sameclass = temp1 .∈ Ref([1, 4])
samehousehold = distance .== 0.0
distance[samehousehold] .= Inf
dist = [(distance[i, j], sameclass[i, j], samehousehold[i, j]) for i = 1:size(risks, 1), j = 1:size(risks, 1)]

pop = Population(risks, dist)


Out[3]:
Population object (n=188)

In [4]:
# Define risk functions/TN-ILM structure
function _constant(params::Vector{Float64}, pop::Population, i::Int64)
  return params[1]
end

function _one(params::Vector{Float64}, pop::Population, i::Int64)
  return 1.0
end

function _powerlaw_plus(params::Vector{Float64}, pop::Population, i::Int64, k::Int64)
  return params[1] * pop.distances[k, i][1]^(-params[2]) +
         params[3] * pop.distances[k, i][2] +
         params[4] * pop.distances[k, i][3]
end

rf = RiskFunctions{SEIR}(_constant, # sparks function
                         _one, # susceptibility function
                         _powerlaw_plus, # infectivity function
                         _one, # transmissability function
                         _constant, # latency function
                         _constant) # removal function


Out[4]:
SEIR model risk functions

In [5]:
obsdata = CSV.read("data/measles_hagelloch_1861_observations.csv")

# Set the removal observation as minimum of (day that rash appears + 4.0) and death, in fatal cases.
removed = [obsdata[i, :death] === NaN ? obsdata[i, :rash] + 4.0 : min(obsdata[i, :rash] + 4.0, obsdata[i, :death]) for i = 1:188]

# Set prodrome within first 7 days of epidemic as initial conditions
infected = obsdata[:, :prodrome]
# starting_states = [i <= 10.0 ? State_I : State_S for i in infected]
# infected[infected .<= 10.0] .= -Inf
obs = EventObservations{SEIR}(infected, removed)


Out[5]:
SEIR model observations (n=188)

In [6]:
# Specify some priors for the risk parameters of our various risk functions
rpriors = RiskPriors{SEIR}([Uniform(0.0, 0.1)],
                           UnivariateDistribution[],
                           [Uniform(0.0, 7.0); Uniform(0.0, 7.0); Uniform(0.0, 1.0); Uniform(0.0, 1.0)],
                           UnivariateDistribution[],
                           [Uniform(0.0, 1.0)],
                           [Uniform(0.0, 1.0)])

# Using CDC measles information set some extents for event data augmentation
# Exposure up to 2 weeks before infectiousness, with a minimum of 5 days between exposure and infectiousness
# Infectious up to 3 days before prodrome
# Removal time within 2-4 days after rash
ee = EventExtents{SEIR}((5.0, 14.0), 3.0, 2.0)


Out[6]:
SEIR model event extents

In [7]:
# Initialize MCMC
mcmc = MCMC(obs, ee, pop, rf, rpriors)
start!(mcmc, attempts=100000) # 1 chain, with 100k initialization attempts

# Run MCMC
iterate!(mcmc, 200000, 1.0, condition_on_network=true, event_batches=10)


Initialization progress100%|████████████████████████████| Time: 0:14:35:37
MCMC progress100%|██████████████████████████████████████| Time: 3:52:59m26m
Out[7]:
SEIR model MCMC with 1 chains

In [7]:
using JLD2;
#@save "mcmc.jld2" mcmc;
@load "mcmc.jld2" mcmc;

In [9]:
gr(dpi=200); # GR backend with DPI=200

In [10]:
# MCMC and posterior plots
p1 = plot(1:20:200001, mcmc.markov_chains[1].risk_parameters, yscale=:log10, title="TN-ILM parameters")
png(p1, joinpath(@__DIR__, "trace.png"))


In [11]:
p2 = plot(mcmc.markov_chains[1].events[100000], State_S,
          linealpha=0.01, title="S", xguidefontsize=8, yguidefontsize=8,
          xtickfontsize=7, ytickfontsize=7, titlefontsize=11)
for i=100050:50:200000
  plot!(p2, mcmc.markov_chains[1].events[i], State_S, linealpha=0.02)
end

p3 = plot(mcmc.markov_chains[1].events[100000], State_E,
          linealpha=0.01, title="E", xguidefontsize=8, yguidefontsize=8,
          xtickfontsize=7, ytickfontsize=7, titlefontsize=11)
for i=100050:50:200000
  plot!(p3, mcmc.markov_chains[1].events[i], State_E, linealpha=0.02)
end

p4 = plot(mcmc.markov_chains[1].events[100000], State_I,
          linealpha=0.01, title="I", xguidefontsize=8, yguidefontsize=8, xtickfontsize=7, ytickfontsize=7, titlefontsize=11)
for i=100050:50:200000
  plot!(p4, mcmc.markov_chains[1].events[i], State_I, linealpha=0.02)
end
plot!(p4, obs, State_I, linecolor=:black, linewidth=1.5) # Show infection observations (day of prodrome)

p5 = plot(mcmc.markov_chains[1].events[100000], State_R,
          linealpha=0.01, title="R", xguidefontsize=8, yguidefontsize=8, xtickfontsize=7, ytickfontsize=7, titlefontsize=11)
for i=100050:50:200000
  plot!(p5, mcmc.markov_chains[1].events[i], State_R, linealpha=0.02)
end
plot!(p5, obs, State_R, linecolor=:black, linewidth=1.5) # Show removal observations (day of appearance of rash + 4)

l = @layout [a b c d]
combinedplots1 = plot(p2, p3, p4, p5, layout=l, link=:y, size=(800,200))
png(combinedplots1, joinpath(@__DIR__, "posterior_epi_curves.png"))


In [41]:
tnp = TransmissionNetworkPosterior(mcmc, burnin=100000, thin=50)


Out[41]:
TransmissionNetworkDistribution Σexternal = 7.7036481759120425 Σinternal = 180.29635182408796

In [13]:
tnoesterle = TransmissionNetwork(BitArray(readdlm("data/oesterle_tn_external.csv")[:]), BitArray(readdlm("data/oesterle_tn_internal.csv")))


Out[13]:
TransmissionNetwork Σexternal = 4 Σinternal = 184

In [14]:
# As there are several individuals at single locations, jitter locations to better illustrate the 
# transmission network distribution
xyjitter = select(risks, :x, :y)
xyjitter[:, :x] = xyjitter[:, :x] + rand(Normal(0, 3), 188)
xyjitter[:, :y] = xyjitter[:, :y] + rand(Normal(0, 3), 188)
plotpop = Population(xyjitter)


Out[14]:
Population object (n=188)

In [15]:
p6 = plot(tnp, plotpop, title="Transmission Network Posterior Distribution", titlefontsize=11, framestyle=:box, markeralpha=0.5, size=(400, 400))

png(p6, joinpath(@__DIR__, "posterior_tn.png"))

In [16]:
p7 = plot(tnoesterle, plotpop, title="Oesterle (1992) Transmission Network", titlefontsize=11, framestyle=:box, markeralpha=0.5, size=(400, 400))

png(p7, joinpath(@__DIR__, "Oesterle_tn.png"))

In [17]:
l = @layout [a b]
combinedplots2 = plot(p6, p7, layout=l, size=(800, 400))
png(combinedplots2, joinpath(@__DIR__, "tn_side_by_side.png"))


In [18]:
l = @layout [grid(1,2){0.6h}
             grid(1,4)]
combinedplots3 = plot(p6, p7, p2, p3, p4, p5, layout=l, size=(1000, 600))
png(combinedplots3, joinpath(@__DIR__, "overview.png"))


In [20]:
# Get a summary of TN-ILM parameters
summary(mcmc, burnin=100000, thin=50)


Out[20]:

7 rows × 4 columns

parametermeanvarCI
StringFloat64Float64Tuple…
1ϵ₁0.001261494.10697e-7(0.000331872, 0.00284014)
2κ₁4.137573.37574(0.600692, 6.87183)
3κ₂1.962940.0331777(1.4777, 2.22672)
4κ₃0.02005521.14699e-5(0.0139816, 0.0271123)
5κ₄0.1952110.00117201(0.13244, 0.269877)
6Ωl₁0.1258538.73529e-5(0.108376, 0.145191)
7Ωr₁0.1209878.10217e-5(0.103854, 0.139071)

In [19]:
p8 = plot(tnp, size=(500,400), title="Transmission network posterior\ndegree distribution", titlefontsize=11)
png(p8, joinpath(@__DIR__, "outdegree.png"))


In [21]:
Pathogen._outdegree(tnoesterle)[45]


Out[21]:
30

In [22]:
Pathogen._outdegree(tnp)[45]


Out[22]:
11.114942528735638

In [23]:
sum(mode(tnp).internal .* tnoesterle.internal) + sum(mode(tnp).external .* tnoesterle.external)


Out[23]:
103

In [24]:
sum(tnp.external)


Out[24]:
7.7036481759120425

In [23]:
pm_events = mean(mcmc.markov_chains[1].events[100000:50:200000])


Out[23]:
SEIR model event times (n=188)

In [35]:
histogram(pm_events.infection .- pm_events.exposure, legend=:none, ylabel="Frequency", xlabel="Latent period")


Out[35]:
6 7 8 9 10 11 12 0 10 20 30 40 Latent period Frequency

In [29]:
mean(pm_events.infection .- pm_events.exposure)


Out[29]:
7.980969853022001

In [34]:
histogram(obs.infection .- pm_events.exposure, legend=:none, ylabel="Frequency", xlabel="Incubation period")


Out[34]:
7 8 9 10 11 12 13 0 10 20 30 Incubation period Frequency

In [30]:
mean(obs.infection .- pm_events.exposure)


Out[30]:
9.418605077939166

In [39]:
argmax(obs.infection)


Out[39]:
141

In [43]:
sort(obs.infection)


Out[43]:
188-element Array{Float64,1}:
  0.0
  2.0
  8.0
  8.0
  9.0
 12.0
 12.0
 13.0
 14.0
 16.0
 16.0
 18.0
 19.0
  ⋮
 42.0
 42.0
 43.0
 43.0
 43.0
 44.0
 44.0
 44.0
 45.0
 46.0
 46.0
 86.0

In [42]:
tnp.external[141]


Out[42]:
1.0

In [ ]: